本文共 3084 字,大约阅读时间需要 10 分钟。
db = $db; } function geturlfile($url) { $url = trim($url); $content = ''; if (extension_loaded('curl')) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_HEADER, 0); $content = curl_exec($ch); curl_close($ch); } else { $content = file_get_contents($url); } return trim($content); } function get_all_url($code) { preg_match_all('/"\\' ]+)["|\\']?\\s*[^>]*>([^>]+)<\\/a>/is', $code, $arr); return array('name' => $arr[2], 'url' => $arr[1]); } function get_sub_content($str, $start, $end) { $start = trim($start); $end = trim($end); if ($start == '' || $end == '') { return $str; } $str = explode($start, $str); $str = explode($end, $str[1]); return $str[0]; } function vd($var) { echo " \\r\\n"; echo ""; }}?> geturlfile($url);//定义采集列表区间$start = '\\r\\n"; var_dump($var); echo "\\r\\n\\r\\n"; echo "';$end = '';//获取区间内的文章URL和TITLE$code = $gather->get_sub_content($html, $start, $end);$newsAry = $gather->get_all_url($code);//打印出结果//$gather->vd($newsAry);$tarGetUrl = $newsAry['url'][0];//获取目标网址HTML$html = $gather->geturlfile($tarGetUrl);//定义采集列表区间$start = '
//采集html function getwebcontent($url){ $ch = curl_init(); $timeout = 10; curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); $contents = trim(curl_exec($ch)); curl_close($ch); return $contents; } //获得标题和url $string = getwebcontent('http://www.***.com/learn/zhunbeihuaiyun/jijibeiyun/2'); //正则匹配
如何联系我:【万里虎】www.bravetiger.cn 【QQ】3396726884 (咨询问题100元起,帮助解决问题500元起) 【博客】http://www.cnblogs.com/kenshinobiy/